library(ggplot2)
library(ggthemes)
library(lattice)
library(plot3D)
library(rriskDistributions)
library(stringr)
require(MASS)
## Loading required package: MASS
##Indicateur de temps
time.ini = Sys.time()
Fonction de lecture des stats résumées. La fonction est prévue pour lire les fichiers de stats résumées dans des répertoires de la forme “Ne100/simu_1” (en modifiant les valeurs de Ne et de simulations). Elle renvoie les résultats sous forme de data frame en ajoutant aux statistiques résumées les colonnes correspondant à l’effectif efficace et la simulation.
#Data frame construction
data.frame.stat = function(seq_ne, max_gen, max_simu){
#Créer matrice vide qui contiendra les stats
mat_stat = matrix(data = NA, nrow = length(seq_ne)*max_gen*max_simu, ncol = 64)
mat_stat = as.data.frame(mat_stat)
#Compteur pour indiquer les lignes à remplir
cpt = 0
for (ne in seq_ne) {
if (dir.exists(str_c("Ne", toString(ne), "/"))) {
dir_ne = str_c("Ne", toString(ne), "/") #Répertoire de taille effective
row_min = cpt*max_gen*max_simu+1 #Ligne min. où on écrit les stats pour le Ne donné
row_max = (cpt+1)*max_gen*max_simu #Ligne max. où on écrit les stats
mat_stat[row_min:row_max,1] = toString(ne) #Ecriture du Ne correspondant pour les stats qui seront écrites
cpt = cpt+1 #Incrémentation du compteur
for (nb_simu in 1:max_simu) {
dir_simu = str_c("simu_", toString(nb_simu), "/")
row_min_simu = (nb_simu-1)*max_gen + row_min
row_max_simu = nb_simu*max_gen + row_min -1
mat_stat[row_min_simu:row_max_simu,2] = nb_simu
file_path = str_c(dir_ne, dir_simu, "final_sumstats.txt")
file_stat = read.table(file_path, header = TRUE) #Lecture fichier stat résumées
#Suppression 1ère colonne, contenant uniquement chiffres pour tri en bash.
mat_stat[row_min_simu:row_max_simu,3:63] = file_stat
mat_stat[row_min_simu:row_max_simu,64] = rep((cpt-1), max_gen)
}
}
}
#Attribution nom colonnes selon appelation stat par MetHis
colnames(mat_stat) = c("Ne", "simu", names(file_stat),"cpt")
#Passage des Ne en facteur
mat_stat$Ne = factor(as.factor(mat_stat$Ne), levels = as.character(seq_ne))
#Passage simulations en entier
mat_stat$simu = as.integer(mat_stat$simu)
#Passage générations en entier
mat_stat$Generation = as.integer(mat_stat$Generation)
#Passage compteur en facteur
mat_stat$cpt = as.integer(mat_stat$cpt)
#Passage stats en double
for (numcol in 4:ncol(mat_stat)) {
mat_stat[,numcol] = as.double(mat_stat[,numcol])
}
return(mat_stat)
}
Dans le cas où l’on travaille sur de très nombreuses simulations correspondant à un effectif efficace, cette fonction permet de calculer les statistiques moyennées pour les valeurs de Ne étudiées.
#Passage en moyenne
data.frame.mean = function(df_stat, max_gen, seq_ne){
lrow = length(seq_ne)
df_mean = matrix(data = NA, nrow = lrow*max_gen, ncol = 62 )
df_mean = as.data.frame(df_mean)
cpt = 0
for (ne in seq_ne) {
df_tmp = df_stat[which(df_stat$Ne == ne),]
row_min = cpt*max_gen+1
row_max = (cpt+1)*max_gen
df_mean[row_min:row_max, 1] = toString(ne)
cpt = cpt+1
for (gen in 0:(max_gen-1) ) {
df_mean[row_min+gen, 2] = gen
vec_tmp = apply(df_tmp[which(df_tmp$Generation == gen), 4:63], 2, mean)
df_mean[row_min+gen, 3:62] = vec_tmp
}
}
colnames(df_mean) = colnames(df_stat)[-2]
df_mean$Ne = factor(as.factor(df_mean$Ne), levels = seq_ne )
return(df_mean)
}
Fonction d’affichage des graphiques en 2D, avec un background blanc et les lignes horizontales de quadrillage affichées. Cette fonction permet d’afficher les lignes de régression ou les lignes reliant les points.
#Plot function
#Affichage d'une stat au cours des générations
plot_stat_gen = function(df, gen, stat, group_col = c(), color_col, titre, ligne = TRUE, legd = TRUE,
size_point = 0.1, line_t = "solid"){
p = ggplot(df, aes(x = gen, y = stat,
group = group_col, color = color_col)) + ggtitle(titre)
if (ligne) {
p = p + geom_point(size = size_point, show.legend = legd)+
geom_line(aes(linetype = line_t), show.legend = legd)
}else{
#smooth de la courbe pour obtenir régression et tempérer variabilité due au TCL
p = p + geom_point(size = size_point, show.legend = legd)+ geom_smooth(show.legend = legd)
}
p = p + theme_classic()
p = p + theme(panel.grid.major.y = element_line(size = 0.5,
linetype = 'solid',
colour = "#BABABA"),
panel.grid.minor.y = element_line(size = 0.25,
linetype = 'solid',
colour = "#CECECE"))
return(p)
}
Cette fonction d’affichage de graphique reprend la précédente, mais sans affficher les points.
#Plot function
#Affichage d'une stat au cours des générations
plot_without_point = function(df, gen, stat, color_col, titre, legd = TRUE){
p = ggplot(df, aes(x = gen, y = stat, color = color_col)) + ggtitle(titre)
#smooth de la courbe pour obtenir régression et tempérer variabilité due au TCL
p = p + geom_smooth(show.legend = legd)
p = p + theme_classic()
p = p + theme(panel.grid.major.y = element_line(size = 0.5,
linetype = 'solid',
colour = "#BABABA"),
panel.grid.minor.y = element_line(size = 0.25,
linetype = 'solid',
colour = "#CECECE"))
#print(p)
return(p)
}
improve_plot_bottle = function(p, vec_abline, lab_col, lab_line, vec_linetype){
for (rg_abl in 1:length(vec_abline)) {
p = p + geom_vline(xintercept = vec_abline[rg_abl], size = 0.4, color = "orange", linetype = "solid")
}
p = p + labs(color = lab_col, linetype = lab_line) + scale_linetype_manual(values=vec_linetype)
return(p)
}
Fonction d’extraction d’une sous-matrice à partir d’une matrice principale en fonction de valeurs choisies de Ne.
extract_sub_mat = function(all_mat, list_ne){
all_rows = c()
for (ne in as.character(list_ne)) {
all_rows = append(all_rows, which(all_mat[,1] == ne))
}
return(all_mat[all_rows,])
}
###Enregistrement des fonctions
print(difftime(Sys.time(), time.ini))
## Time difference of 0.08071566 secs
Calcul des matrices de stats
seq_1024_16384 = 2**seq(10,14,1)
seq_50_1000 = seq(50,1000,1)
seq_tot = sort(c(seq_50_1000, seq_1024_16384))
mat_tot = data.frame.stat(seq_tot, 101, 1)
mat_50_1000 = extract_sub_mat(mat_tot, c(seq(50,200,10), seq(200,1000,100)))
mat_1024_16384 = extract_sub_mat(mat_tot, 2**seq(6,14,1))
mat_64_100_16384 = extract_sub_mat(mat_tot, c(2**seq(6,14,1), 100, 1000) )
#mat_1024_16384_mean = data.frame.mean(mat_1024_16384, 101, seq_1024_16384)
Graphiques obtenus : affichage de l’évolution d’une statistique donnée en fonction des générations colorée selon la Ne initiale.
p_tmp = plot_stat_gen(mat_tot, mat_tot$Generation,
mat_tot$mean.het.adm, mat_tot$Ne, mat_tot$Ne,ligne = T,
"Moyenne htz en fonction des générations\n selon différentes Ne initiales",
legd = FALSE)
p_tmp = p_tmp+geom_hline(yintercept = mat_50_1000$mean.het.s1[1], color = "red")
p_tmp = p_tmp+geom_hline(yintercept = mat_50_1000$mean.het.s2[1], color = "black")
print(p_tmp)
p_tmp = plot_stat_gen(mat_50_1000, mat_50_1000$Generation,
mat_50_1000$mean.het.adm, mat_50_1000$Ne, mat_50_1000$Ne,ligne = T,
"Moyenne htz en fonction des générations\nselon différentes Ne initiales",
legd = TRUE)
p_tmp = p_tmp+geom_hline(yintercept = mat_50_1000$mean.het.s1[1], color = "red")
p_tmp = p_tmp+geom_hline(yintercept = mat_50_1000$mean.het.s2[1], color = "black")
print(p_tmp)
write_cstt_plot = function(name_stat, mat, name_file,min_y, max_y){
num_col = which(colnames(mat) == name_stat)
p = plot_stat_gen(mat, mat$Generation,
mat[,num_col], mat$Ne, mat$Ne,ligne = T,
str_c(name_stat, "en fonction des générations\n selon différentes Ne initiales"))
png(filename = str_c("../../../Images/", name_file, ".png"), width = 1000, height = 800)
print(p+ylim(min_y, max_y)+ labs(color = "Ne") + guides(linetype = FALSE))
dev.off()
ggsave(filename = str_c("../../../Images/", name_file, ".eps"), plot = p)
}
write_cstt_plot("mean.het.adm", mat_64_100_16384, "moyenne_heterozygotie/moy_htz_ne_cst", 0.01, 0.062)
## Saving 7 x 5 in image
write_cstt_plot("Fst.s1.adm", mat_64_100_16384, "Fsts1adm/fst_s1_adm_ne_cst", -0.01, 0.5)
## Saving 7 x 5 in image
write_cstt_plot("mean.F.adm", mat_64_100_16384, "Fadm/f_adm_ne_cst", -0.03, 0.03)
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Saving 7 x 5 in image
plot_stat_gen(mat_tot, mat_tot$Generation,
mat_tot$Fst.s1.adm, mat_tot$Ne,mat_tot$Ne,
"Fst s1 adm en fonction des générations\n selon différentes Ne initiales",
legd = FALSE)
plot_stat_gen(mat_50_1000, mat_50_1000$Generation,
mat_50_1000$Fst.s1.adm, mat_50_1000$Ne,mat_50_1000$Ne,
"Fst s1 adm en fonction des générations\n selon différentes Ne initiales")
plot_stat_gen(mat_1024_16384, mat_1024_16384$Generation,
mat_1024_16384$Fst.s1.adm, mat_1024_16384$Ne,mat_1024_16384$Ne,ligne = T,
"Fst s1 adm en fonction des générations\n selon différentes Ne initiales")
###Populations à Ne constants
print(difftime(Sys.time(), time.ini))
## Time difference of 2.089165 mins
Réutilisation de la fonction de lecture de stats résumées pour des populations croissantes. Celles-ci sont enregistrées dans des répertoires de la forme “Ne100-XXX/Ne100-1000/simu_1” Fonction de lecture des stats résumées. La fonction est prévue pour lire les fichiers de stats résumées dans des répertoires de la forme “NeXXX/simu_XXX” (avec XXX remplacés par les valeurs de Ne et de simulations). Elle renvoie les résultats sous forme de data frame en ajoutant aux statistiques résumées les colonnes correspondant à l’effectif efficace et la simulation.
data.frame.increase = function(seq_ne_ini, seq_combi){
for (ne_ini in seq_ne_ini) {
if (dir.exists(str_c("Ne", ne_ini, "-XXX/"))) {
dir_ne = str_c("Ne", ne_ini, "-XXX/")
setwd(dir_ne)
motif_detect = str_c("^",ne_ini,"-")
vec_ne_tmp = seq_combi[which(str_detect(seq_combi, motif_detect))]
mat_tmp = data.frame.stat(seq_ne = vec_ne_tmp, max_gen = 101, max_simu = 1)
if (ne_ini == seq_ne_ini[1]) {
mat_pop_inc = mat_tmp
}else{
mat_pop_inc = rbind(mat_pop_inc, mat_tmp)
}
setwd("../")
}
}
return(na.omit(mat_pop_inc))
}
Lecture des data frame pour les combinaison de Ne entre ne0 (ne_ini) et nef (ne_fin). On extrait ensuite les stats pour lesquelles on a un nef de 10000.
setwd("../new_methis_pop_increase_50000_snp/")
seq_ne_ini = seq(50,100,2)
seq_ne_fin = seq(100, 10000, 100)
seq_combi = as.data.frame(outer(seq_ne_ini, seq_ne_fin, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
mat_pop_inc = data.frame.increase(seq_ne_ini, seq_combi)
mat_end_10000 = extract_sub_mat(mat_pop_inc,
as.data.frame(outer(seq(50,100,4),
c("10000"),
FUN="paste",
sep="-"))[,1])
plot_stat_gen(mat_pop_inc, mat_pop_inc$Generation,
mat_pop_inc$f3, mat_pop_inc$Ne,mat_pop_inc$Ne,
"Stat en fonction des générations\n selon différentes Ne initiales",
FALSE, legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
plot_stat_gen(mat_pop_inc, mat_pop_inc$Generation,
mat_pop_inc$mean.het.adm, mat_pop_inc$Ne,mat_pop_inc$Ne,
"Stat en fonction des générations\n selon différentes Ne initiales",
FALSE, legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
plot_stat_gen(mat_end_10000, mat_end_10000$Generation,
mat_end_10000$mean.het.adm, mat_end_10000$Ne,mat_end_10000$Ne,
"Moyenne hétérozygotie en fonction des générations\n selon différentes Ne initiales",
legd = TRUE)
plot_stat_gen(mat_end_10000, mat_end_10000$Generation,
mat_end_10000$f3, mat_end_10000$Ne,mat_end_10000$Ne,
"F3 en fonction des générations\n selon différentes Ne initiales",
legd = TRUE)
###Populations croissantes à Nu aléatoire dans la loi uniforme [0,0.5]
print(difftime(Sys.time(), time.ini))
## Time difference of 5.682296 mins
setwd("../new_methis_pop_increase_50000_snp_Nu_var/")
seq_nu = sprintf("%.2f", seq(0.05, 0.45, 0.05))
seq_ne_ini = seq(50,100,10)
seq_ne_fin = seq(100, 10000, 200)
seq_combi = as.data.frame(outer(seq_ne_ini, seq_ne_fin, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
list_df_nu = list()
cpt = 1
for (nu in seq_nu) {
if(dir.exists(str_c("Nu", nu ))){
dir_nu = str_c("Nu", nu, "/")
setwd(dir_nu)
list_df_nu[[cpt]] = data.frame.increase(seq_ne_ini, seq_combi)
cpt = cpt+1
setwd("../")
}
}
for (combi_lst in 1:length(list_df_nu)) {
if (combi_lst == 1) {
mat_pop_nu = list_df_nu[[combi_lst]]
mat_pop_nu = data.frame(mat_pop_nu, Nu = seq_nu[combi_lst])
}else{
mat_tmp = list_df_nu[[combi_lst]]
mat_tmp = data.frame(mat_tmp, Nu = seq_nu[combi_lst])
mat_pop_nu = rbind(mat_pop_nu, mat_tmp)
}
}
mat_pop_nu$Nu = as.factor(mat_pop_nu$Nu)
plot_without_point(mat_pop_nu, mat_pop_nu$Generation,
mat_pop_nu$mean.het.adm, mat_pop_nu$Ne,
"Moyenne hétérozygotie en fonction des générations\n selon différentes Ne initiales\n et couleurs selon nu",
legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
plot_without_point(mat_pop_nu, mat_pop_nu$Generation,
mat_pop_nu$mean.het.adm, mat_pop_nu$Nu,
"Moyenne hétérozygotie en fonction des générations\n selon différentes Ne initiales\n et couleurs selon nu",
legd = TRUE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# plot_stat_gen(mat_pop_nu, mat_pop_nu$Generation,
# mat_pop_nu$mean.het.adm, mat_pop_nu$Ne,
# "Stat en fonction des générations\n selon différentes Ne initiales",
# TRUE, legd = FALSE)
# plot_stat_gen(mat_pop_nu, mat_pop_nu$Generation,
# mat_pop_nu$mean.het.adm, mat_pop_nu$Nu,
# "Stat en fonction des générations\n selon différentes Ne initiales",
# TRUE, legd = FALSE)
for (rg_list in 1:length(list_df_nu)) {
p_tmp = plot_stat_gen(list_df_nu[[rg_list]], list_df_nu[[rg_list]]$Generation,
list_df_nu[[rg_list]]$mean.het.adm, list_df_nu[[rg_list]]$Ne,
list_df_nu[[rg_list]]$Ne,
str_c("Nu = ",seq_nu[rg_list]), TRUE,legd = FALSE)
p_tmp = p_tmp+geom_hline(yintercept = list_df_nu[[rg_list]]$mean.het.s1[1],
color = "red")
p_tmp = p_tmp+geom_hline(yintercept = list_df_nu[[rg_list]]$mean.het.s2[1],
color = "black")
print(p_tmp)
}
for (rg_list in 1:length(list_df_nu)) {
list_ne = list_df_nu[[rg_list]]$Ne[which(list_df_nu[[rg_list]]$Generation == 100)]
list_ne = as.integer(unlist(str_split(list_ne, "-")))
list_ne0 = list_ne[seq(1, length(list_ne), 2)]
list_nef = list_ne[seq(2, length(list_ne), 2)]
mat_tmp_u = list_df_nu[[rg_list]][which(list_df_nu[[rg_list]]$Generation == 100),]
scatter3D(x = list_ne0, y = list_nef, z = mat_tmp_u$mean.het.adm,
bty = "b2", pch = 16, ticktype = "detailed", main = seq_nu[rg_list],
theta = 120, phi = 0, type = "s", cex.lab = 0.6, cex.axis = 0.5,
xlab = "Ne0", ylab = "Nef", zlab = "Stat")
}
# grid.lines = 50
# fit <- loess(mat_tmp_u$mean.het.adm ~ list_nef + list_ne0)
# x.pred <- seq(min(list_ne0), max(list_ne0), length.out = grid.lines)
# y.pred <- seq(min(list_nef), max(list_nef), length.out = grid.lines)
# xy <- expand.grid( x = x.pred, y = y.pred)
# z.pred <- matrix(predict(fit, newdata = xy),
# nrow = grid.lines, ncol = grid.lines)
#
# fitpoints = predict(fit)
# scatter3D(x = list_ne0, y = list_nef, z = mat_tmp_u$mean.het.adm,
# bty = "b2", pch = 16, ticktype = "detailed", main = seq_nu[rg_list],
# theta = 150, phi = 0, type = "s",cex = 0.5, cex.lab = 0.6, cex.axis = 0.5,
# xlab = "Ne0", ylab = "Nef", zlab = "Stat",
# surf = list(x = x.pred, y = y.pred, z = z.pred,
# facets = NA))
for (rg_list in 1:length(list_df_nu)) {
list_ne = list_df_nu[[rg_list]]$Ne[which(list_df_nu[[rg_list]]$Generation == 100)]
list_ne = as.integer(unlist(str_split(list_ne, "-")))
list_ne0 = list_ne[seq(1, length(list_ne), 2)]
list_nef = list_ne[seq(2, length(list_ne), 2)]
mat_tmp_u = list_df_nu[[rg_list]][which(list_df_nu[[rg_list]]$Generation == 100),]
print(wireframe(mat_tmp_u$mean.het.adm ~ list_ne0 * list_nef, data = mat_tmp_u,
drape = TRUE, zlab = "Htz", xlab = "Ne0", ylab = "Nef",
screen = list(z = 40, x = -60),
scales = list(z.ticks=5, arrows=F, col="black", font=1, tck=0.8),
main = seq_nu[rg_list]))
}
Calcul matrice pour connaître Ne à chaque génération.
setwd("../new_methis_pop_increase_50000_snp_Nu_var/")
mat_ne_inc = data.frame()
cpt = 1
for (nu in seq_nu) {
if(dir.exists(str_c("Nu", nu ))){
dir_nu = str_c("Nu", nu, "/")
setwd(dir_nu)
for (ne_ini in seq_ne_ini) {
if (dir.exists(str_c("Ne", ne_ini, "-XXX/"))) {
dir_ne = str_c("Ne", ne_ini, "-XXX/")
setwd(dir_ne)
seq_combi = as.data.frame(outer(ne_ini, seq_ne_fin,
FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
for (combi in seq_combi) {
if (dir.exists(str_c("Ne", combi))) {
dir_ne_bis = str_c("Ne", combi, "/simu_1/")
setwd(dir_ne_bis)
cpt = cpt + 1
file_tmp = read.table("simu_1.par", header = TRUE)
if (nu == seq_nu[1] && ne_ini==seq_ne_ini[1] && combi == seq_combi[1]) {
mat_ne_inc = as.data.frame(file_tmp[,2])
}else{
mat_ne_inc = cbind(mat_ne_inc, file_tmp[,2])
}
setwd("../../")
}
}
setwd("../")
}
}
setwd("../")
}
}
mat_pop_nu = data.frame(mat_pop_nu,
ne_gen = unlist(mat_ne_inc , use.names = F),
n0 = rep(NA, nrow(mat_pop_nu)),
nf = rep(NA, nrow(mat_pop_nu)) )
for (i in 1:nrow(mat_pop_nu)) {
vec_tmp = as.integer(unlist(str_split(mat_pop_nu$Ne[i], "-")))
mat_pop_nu$n0[i] = vec_tmp[1]
mat_pop_nu$nf[i] = vec_tmp[2]
}
for (rg_nu in seq_nu) {
rg_nu
df_tmp = mat_pop_nu[which(mat_pop_nu$Nu == rg_nu),]
#grid.lines = 30
#fit <- loess(df_tmp$mean.het.adm ~ df_tmp$Generation + as.numeric(df_tmp$ne_gen))
#x.pred <- seq(min(df_tmp$Generation), max(df_tmp$Generation), length.out = grid.lines)
#y.pred <- seq(min(as.numeric(df_tmp$ne_gen)),
# max(as.numeric(df_tmp$ne_gen)), length.out = grid.lines)
#xy <- expand.grid( x = x.pred, y = y.pred)
# z.pred <- matrix(predict(fit, newdata = xy),
# nrow = grid.lines, ncol = grid.lines)
scatter3D(x = df_tmp$Generation, y = df_tmp$ne_gen, z = df_tmp$mean.het.adm,
bty = "b2", pch = 16, ticktype = "detailed", main = seq_nu[rg_nu],
theta = 210, phi = 20, type = "s",cex = 0.2, cex.lab = 0.6, cex.axis = 0.5,
xlab = "Gen", ylab = "Ne", zlab = "Stat")
}
#surf = list(x = x.pred, y = y.pred, z = z.pred,facets = NA)
seq_extract = c("0.05", "0.25", "0.45")
df_extract = mat_pop_nu[which(mat_pop_nu$Nu == seq_extract),]
scatter3D(x = mat_pop_nu$Generation, y = mat_pop_nu$ne_gen,
z = as.numeric(as.character(mat_pop_nu$Nu)),
bty = "b2", pch = 16, ticktype = "detailed",
theta = 210, phi = 50, type = "s", cex.lab = 0.6, cex.axis = 0.5,
xlab = "Gen", ylab = "Ne", zlab = "Nu")
setwd("../new_methis_pop_increase_50000_snp_Nu_var/")
seq_nu_bis = sprintf("%.2f", c(0.01, 0.25, 0.49))
seq_ne_ini = c(100)
seq_ne_fin = c(200, 400, seq(1000, 10000, 1000))
seq_combi = as.data.frame(outer(seq_ne_ini, seq_ne_fin, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
df_nu_reduce = data.frame()
for (nu in seq_nu_bis) {
if(dir.exists(str_c("Nu", nu ))){
dir_nu = str_c("Nu", nu, "/")
setwd(dir_nu)
df_tmp = data.frame.increase(seq_ne_ini, seq_combi)
col_nu = as.character(rep(nu, nrow(df_tmp)))
col_n0 = rep(NA, nrow(df_tmp))
col_nf = rep(NA, nrow(df_tmp))
if (nu == seq_nu_bis[1]) {
df_nu_reduce = data.frame(df_tmp, nu = col_nu,
n0 = col_n0, nf = col_nf)
}else{
df_tmp = data.frame(df_tmp, nu = col_nu,
n0 = col_n0, nf = col_nf)
df_nu_reduce = rbind(df_nu_reduce, df_tmp)
}
setwd("../")
}
}
for (i in 1:nrow(df_nu_reduce)) {
vec_tmp = as.integer(unlist(str_split(df_nu_reduce$Ne[i], "-")))
df_nu_reduce$n0[i] = vec_tmp[1]
df_nu_reduce$nf[i] = vec_tmp[2]
}
ne_gen = data.frame()
cpt = 1
for (nu in seq_nu_bis) {
if(dir.exists(str_c("Nu", nu ))){
dir_nu = str_c("Nu", nu, "/")
setwd(dir_nu)
for (ne_ini in seq_ne_ini) {
if (dir.exists(str_c("Ne", ne_ini, "-XXX/"))) {
dir_ne = str_c("Ne", ne_ini, "-XXX/")
setwd(dir_ne)
seq_combi = as.data.frame(outer(ne_ini, seq_ne_fin,
FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
for (combi in seq_combi) {
if (dir.exists(str_c("Ne", combi))) {
dir_ne_bis = str_c("Ne", combi, "/simu_1/")
setwd(dir_ne_bis)
cpt = cpt + 1
file_tmp = read.table("simu_1.par", header = TRUE)
if (nu == seq_nu_bis[1] && ne_ini==seq_ne_ini[1] && combi == seq_combi[1]) {
ne_gen = as.data.frame(file_tmp[,2])
}else{
ne_gen = rbind(ne_gen, file_tmp[,2])
}
setwd("../../")
}
}
setwd("../")
}
}
setwd("../")
}
}
df_nu_reduce_gen_100 = df_nu_reduce[which(df_nu_reduce$Generation == 100),]
df_nu_reduce$log_mean = log(df_nu_reduce$mean.het.adm)
scatter3D(x = df_nu_reduce$Generation, y = df_nu_reduce$nf,
z = df_nu_reduce$mean.het.adm, colvar = as.numeric(df_nu_reduce$nu),
col = c("#1B9E77", "#D95F02", "#7570B3"),
colkey = list(at = c(0.1, 0.25, 0.4),
addlines = TRUE, length = 1, width = 0.5,
labels = levels(as.factor(df_nu_reduce$nu))),
ticktype = "detailed",bty = "g",pch = 16,
theta = 115, phi = 0, type = "s", cex = 0.5, cex.lab = 0.6, cex.axis = 0.5,
xlab = "generation", ylab = "Nf", zlab = "stat", clab = "U")
# scatter3D(x = df_tmp$Generation, y = vec_tot, z = df_tmp$mean.het.adm,
# bty = "b2", pch = 16, ticktype = "detailed", main = seq_nu[rg_nu],
# theta = 210, phi = 20, type = "s", cex.lab = 0.6, cex.axis = 0.5,
# xlab = "Gen", ylab = "Ne", zlab = "Stat")
seq_ne_ini3 = seq(100,100,0)
seq_multi3 = c(2,4,10,100)
seq_ne_fin3 = seq_multi3*seq_ne_ini3
seq_combi3 = as.data.frame(outer(seq_ne_ini3, seq_ne_fin3, FUN="paste", sep="-"))
seq_combi3 = as.data.frame.vector(unlist(seq_combi3))[[1]]
for (rg in c(1:length(seq_combi3))) {
if (rg == 1) {
mat_pop_nu3 = df_nu_reduce[which(df_nu_reduce$Ne == seq_combi3[rg]),]
}else{
tmp = df_nu_reduce[which(df_nu_reduce$Ne == seq_combi3[rg]),]
mat_pop_nu3 = rbind(mat_pop_nu3, tmp)
}
}
write_increase_plot = function(name_stat, mat, name_file, min_y, max_y){
num_col = which(colnames(mat) == name_stat)
p = plot_stat_gen(df = mat, gen = mat$Generation,
stat = mat[,num_col], group_col = c(),
color_col = mat$Ne, line_t = mat$nu,
titre = str_c(name_stat," en fonction des générations\n selon différentes croissances de populations"),
legd = TRUE)
p = improve_plot_bottle(p =p, vec_abline = c(), lab_col = "ne0-nef",
lab_line = "U", vec_linetype = c("dotted", "longdash", "solid"))
png(filename = str_c("../../../Images/", name_file,".png"), width = 1000, height = 800)
p = p+ylim(min_y, max_y)
print(p)
dev.off()
ggsave(filename = str_c("../../../Images/", name_file, ".eps"), plot = p)
}
write_increase_plot("mean.het.adm", mat_pop_nu3, "moyenne_heterozygotie/moy_htz_pop_inc_nu_var",0.01,0.062)
## Saving 7 x 5 in image
write_increase_plot("Fst.s1.adm", mat_pop_nu3, "Fsts1adm/fst_s1_adm_pop_inc_nu_var", 0, 0.5)
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 2 row(s) containing missing values (geom_path).
## Saving 7 x 5 in image
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 2 row(s) containing missing values (geom_path).
write_increase_plot("mean.F.adm", mat_pop_nu3, "Fadm/f_adm_pop_inc_nu_var", -0.03, 0.03)
## Warning: Removed 1 rows containing missing values (geom_point).
## Saving 7 x 5 in image
## Warning: Removed 1 rows containing missing values (geom_point).
###Populations croissantes avec Nu variable et fixé
print(difftime(Sys.time(), time.ini))
## Time difference of 10.71038 mins
setwd("../new_methis_bottleneck_50000_snp/")
seq_alpha = c(0.1, 0.5, 0.9)
seq_nu_red = c(0.01, 0.25, 0.49)
seq_bottle = seq(10, 90, 10)
seq_ne0 = c(1000)
seq_nef = c(1000)
seq_combi = as.data.frame(outer(seq_ne0, seq_nef, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
for (alpha in seq_alpha) {
for (nu in seq_nu_red) {
for (bottle in seq_bottle) {
change_dir = str_c("alpha", alpha, "/Nu", nu, "/bottle",bottle,"/")
setwd(change_dir)
mat_tmp = data.frame.increase(seq_ne0, seq_combi)
col_alpha = rep(alpha, nrow(mat_tmp))
col_nu = rep(nu, nrow(mat_tmp))
col_bottle = rep(bottle, nrow(mat_tmp))
col_bott_u = as.numeric(rep(bottle, nrow(mat_tmp)))/as.numeric(rep(nu, nrow(mat_tmp)))
col_bott_u = ceiling(col_bott_u/1)*1
col_alpha_u = as.numeric(rep(nu, nrow(mat_tmp)))/as.numeric(rep(alpha, nrow(mat_tmp)))
col_alpha_u = ceiling(col_alpha_u/0.001)*0.001
mat_tmp = data.frame(mat_tmp, alpha = col_alpha,
U = col_nu, time_botl = col_bottle,
bott_u = col_bott_u, alpha_u = col_alpha_u)
if (alpha==seq_alpha[1] & nu==seq_nu_red[1] & bottle==seq_bottle[1]) {
mat_bottle = mat_tmp
}else{
mat_bottle = rbind(mat_bottle, mat_tmp)
}
setwd("../../../")
}
}
}
mat_bottle$alpha = as.factor(mat_bottle$alpha)
mat_bottle$U = as.factor(mat_bottle$U)
mat_bottle$time_botl = as.factor(mat_bottle$time_botl)
mat_bottle$cpt = as.factor(mat_bottle$cpt)
mat_bottle$bott_u = as.factor(mat_bottle$bott_u)
mat_bottle$alpha_u = as.factor(mat_bottle$alpha_u)
mat_bottle_time_50 = mat_bottle[which(mat_bottle$time_botl == 50),]
mat_bottle_time_20 = mat_bottle[which(mat_bottle$time_botl == 20),]
mat_bottle_time_80 = mat_bottle[which(mat_bottle$time_botl == 80),]
boucle_plot_bottle = function(mat, vec_stat, name_stat, vec_bott){
for (bott in vec_bott) {
new_mat = mat[which(mat$time_botl == bott),]
new_vec = vec_stat[which(mat$time_botl == bott)]
p = plot_stat_gen(new_mat, new_mat$Generation,
new_vec, c(),
new_mat$alpha, size_point = 0,
str_c(name_stat, " en fonction des générations\nbottleneck ", bott, " N0 1000 Nf 1000"),
TRUE,legd = TRUE, line_t = new_mat$U)
p = improve_plot_bottle(p = p, vec_abline = c(1,bott+1), lab_col = "alpha",
lab_line = "U", vec_linetype = c("solid", "longdash", "dotted"))
print(p)
}
}
plot_without_point(mat_bottle, mat_bottle$Generation,
mat_bottle$mean.het.adm, mat_bottle$alpha,
"Stat en fonction des générations\n selon différents bottleneck",
legd = TRUE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Moyenne de l’hétérozygotie
boucle_plot_bottle(mat_bottle, mat_bottle$mean.het.adm, "Heterozygotie", c(20,50,80))
Fst
boucle_plot_bottle(mat_bottle, mat_bottle$Fst.s1.adm, "Fst s1 adm", c(20,50,80))
boucle_plot_bottle(mat_bottle, mat_bottle$Fst.s2.adm, "Fst s2 adm", c(20,50,80))
boucle_plot_bottle(mat_bottle, mat_bottle$mean.F.adm, "F mean", c(20,50,80))
mini_mat_bottle = rbind(mat_bottle_time_20, mat_bottle_time_50, mat_bottle_time_80)
p = plot_stat_gen(mat_bottle, mat_bottle$Generation,
mat_bottle$mean.het.adm, c(),
mat_bottle$bott_u, line_t = mat_bottle$alpha,
"Stat en fonction des générations\n selon différents bottleneck",TRUE,
legd = TRUE)
p = p+ theme(legend.position="bottom", legend.box = "horizontal")
print(p)
p = plot_stat_gen(mini_mat_bottle, mini_mat_bottle$Generation,
mini_mat_bottle$mean.het.adm, c(),
mini_mat_bottle$bott_u, line_t = mini_mat_bottle$alpha,
"Stat en fonction des générations\n selon différents bottleneck",TRUE,
legd = TRUE)
improve_plot_bottle(p =p, vec_abline = c(21, 51, 81), lab_col = "tpsbott/U",
lab_line = "alpha", vec_linetype = c("solid", "longdash", "dotted"))
write_bottle_plot = function(name_stat, mat, name_file, min_y, max_y){
num_col = which(colnames(mat) == name_stat)
p = plot_stat_gen(mat, mat$Generation,
mat[,num_col], c(),
mat$alpha_u, line_t = mat$time_botl,
str_c(name_stat, " en fonction des générations\n selon différents bottleneck\n (Ne0 = Nef = 1000)"),TRUE,
legd = TRUE)
p = improve_plot_bottle(p =p, vec_abline = c(21, 51, 81), lab_col = "U/alpha",
lab_line = "tpsbott", vec_linetype = c("dotted", "longdash", "solid"))
png(filename = str_c("../../../Images/",name_file,".png"), width = 1000, height = 800)
p = p+ylim(min_y, max_y)
print(p)
dev.off()
ggsave(filename = str_c("../../../Images/", name_file, ".eps"), plot = p)
}
p = plot_stat_gen(mini_mat_bottle, mini_mat_bottle$Generation,
mini_mat_bottle$mean.het.adm, c(),
mini_mat_bottle$alpha_u, line_t = mini_mat_bottle$time_botl,
"Moyenne He en fonction des générations\n selon différents bottleneck\n (Ne0 = Nef = 1000)",TRUE,
legd = TRUE)
p = improve_plot_bottle(p =p, vec_abline = c(21, 51, 81), lab_col = "U/alpha",
lab_line = "tpsbott", vec_linetype = c("dotted", "longdash", "solid"))
print(p)
p = plot_stat_gen(mini_mat_bottle, mini_mat_bottle$Generation,
mini_mat_bottle$Fst.s1.adm, c(),
mini_mat_bottle$alpha_u, line_t = mini_mat_bottle$time_botl,
"Fst s1 adm en fonction des générations\n selon différents bottleneck\n (Ne0 = Nef = 1000)",TRUE,
legd = TRUE)
p = improve_plot_bottle(p =p, vec_abline = c(21, 51, 81), lab_col = "U/alpha",
lab_line = "tpsbott", vec_linetype = c("dotted", "longdash", "solid"))
print(p)
p = plot_stat_gen(mini_mat_bottle, mini_mat_bottle$Generation,
mini_mat_bottle$mean.adm.props, c(),
mini_mat_bottle$alpha_u, line_t = mini_mat_bottle$time_botl,
"Moyenne F en fonction des générations\n selon différents bottleneck\n (Ne0 = Nef = 1000)",TRUE,
legd = TRUE)
p = improve_plot_bottle(p =p, vec_abline = c(21, 51, 81), lab_col = "U/alpha",
lab_line = "tpsbott", vec_linetype = c("dotted", "longdash", "solid"))
print(p)
write_bottle_plot("mean.het.adm", mini_mat_bottle, "moyenne_heterozygotie/moy_htz_bottleneck",0.01,0.062)
## Saving 7 x 5 in image
write_bottle_plot("Fst.s1.adm", mini_mat_bottle, "Fsts1adm/fst_s1_adm_bottleneck", 0, 0.5)
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Saving 7 x 5 in image
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 row(s) containing missing values (geom_path).
write_bottle_plot("mean.F.adm", mini_mat_bottle, "Fadm/f_adm_bottleneck", -0.03, 0.03)
## Saving 7 x 5 in image
vec_tmp = c()
vec_tmp[which(mat_bottle$alpha == 0.1)] = 2
vec_tmp[which(mat_bottle$alpha == 0.5)] = 3
vec_tmp[which(mat_bottle$alpha == 0.9)] = 4
scatter3D(x = mat_bottle$Generation, y = as.numeric(as.character(mat_bottle$time_botl)),
z = mat_bottle$mean.het.adm, colvar = as.numeric(vec_tmp),
ticktype = "detailed",bty = "b2",pch = 16,
theta = 65, phi = 0, type = "s",cex = 0.3, cex.lab = 0.6, cex.axis = 0.5,
xlab = "generation", ylab = "bottle", zlab = "stat",
col = c("#1B9E77", "#D95F02", "#7570B3"),
colkey = list(at = c(2.3, 3, 3.7),
addlines = TRUE, length = 1, width = 0.5,
labels = c("0.1", "0.5", "0.9")),
clab = "alpha")
# new_mat_bottle = data.frame(mat_bottle_time_80, mean_norm = rep(NA, nrow(mat_bottle_time_80)),
# sd_norm = rep(NA, nrow(mat_bottle_time_80)))
#
# for (line in c(seq(1, nrow(new_mat_bottle), 100), seq(11, nrow(new_mat_bottle), 10) ) ){
# vec_props = c(new_mat_bottle$perc10.adm.props[line],
# new_mat_bottle$perc20.adm.props[line],
# new_mat_bottle$perc30.adm.props[line],
# new_mat_bottle$perc40.adm.props[line],
# new_mat_bottle$perc50.adm.props[line],
# new_mat_bottle$perc60.adm.props[line],
# new_mat_bottle$perc70.adm.props[line],
# new_mat_bottle$perc80.adm.props[line],
# new_mat_bottle$perc90.adm.props[line])
# courbe_props = invisible(get.norm.par(p = seq(0.1, 0.9, 0.1), (vec_props)))
#
# if (!is.na(courbe_props) ){
# cat("\014")
# dev.off()
# }
#
# #cat("\014")
# new_mat_bottle$mean_norm[line] = courbe_props[1]
# new_mat_bottle$sd_norm[line] = courbe_props[2]
# }
# setwd("../../../Images/")
#
# nb_simu = nrow(new_mat_bottle)/101
#
# png(file="example%03d.png", width=600, heigh=400)
#
# for (gen in seq(10,101, 10) ) {
# new_mat_tmp = new_mat_bottle[which(new_mat_bottle$Generation == gen),]
# for (alpha in seq_alpha) {
# new_mat_tmp = new_mat_tmp[which(new_mat_tmp$alpha == alpha),]
# for (u in seq_nu_bis) {
# new_mat_tmp = new_mat_tmp[which(new_mat_tmp$U == u),]
# if (alpha == seq_alpha[1] && u == seq_nu_bis[1]) {
# plot(function(x) dnorm(x,new_mat_tmp$mean_norm,new_mat_tmp$sd_norm),
# 0, 1.2,xlab="x",ylab="",main = gen, col ="red")
# }else{
# if (!identical(new_mat_tmp$mean_norm, numeric(0)) |
# !identical(new_mat_tmp$sd_norm, numeric(0))) {
# curve(function(x) dnorm(x,new_mat_tmp$mean_norm,new_mat_tmp$sd_norm),
# col="red",from=0,to=1.2)
# }
#
# }
# }
# }
# }
#
# dev.off()
#
# system("convert -delay 50 *.png loi_norm.gif")
#
# file.remove(list.files(pattern=".png"))
#fit.perc(p = seq(0.1, 0.9, 0.1), (vec_props))
setwd("../../../Images/")
mat_bottle_time_80_u_0.01 = mat_bottle_time_80[which(mat_bottle_time_80$U == 0.01),]
nb_simu = nrow(mat_bottle_time_80_u_0.01) / 101
png(file="example%03d.png", width=600, heigh=400)
#c(seq(1,10,1), seq(11,101,10))
for (line in c(seq(1,10,1), seq(11,101,10))){
for (alpha in seq_alpha) {
mat_tmp_bott = mat_bottle_time_80_u_0.01[which(mat_bottle_time_80_u_0.01$alpha == alpha),]
vec_x = seq(0,100,10)
vec_y = c(mat_tmp_bott$perc0.adm.props[line],
mat_tmp_bott$perc10.adm.props[line],
mat_tmp_bott$perc20.adm.props[line],
mat_tmp_bott$perc30.adm.props[line],
mat_tmp_bott$perc40.adm.props[line],
mat_tmp_bott$perc50.adm.props[line],
mat_tmp_bott$perc60.adm.props[line],
mat_tmp_bott$perc70.adm.props[line],
mat_tmp_bott$perc80.adm.props[line],
mat_tmp_bott$perc90.adm.props[line],
mat_tmp_bott$perc100.adm.props[line])
if (alpha == seq_alpha[1]) {
hist(vec_y, breaks = vec_y, main = line-1, col = "#A5260A", xlim = c(-0.2,1.2))
# box_tmp = data.frame(perc = vec_x, val = vec_y, alpha = rep(alpha, length(vec_x)))
# plot(vec_y, vec_x, type = "b", col = "#A5260A", main = line-1,
# ylim = c(0,100), xlim =c(-0.5,1.2))
}else{
hist(vec_y, breaks = vec_y, add = TRUE, col = (as.numeric(as.character(mat_tmp_bott$alpha))*10+2))
# box_col = data.frame(perc = vec_x, val = vec_y, alpha = rep(alpha, length(vec_x)))
# box_tmp = rbind(box_tmp, box_col)
# lines(vec_y, vec_x, type = "b", col = (as.numeric(as.character(mat_tmp_bott$alpha))*10+3))
}
}
# boxplot(val~alpha, data = box_tmp, main = line-1, col = c("#FF866A", "#77B5FE","#87E990"))
legend("topleft", title="Alpha",names(summary(as.factor(mat_tmp_bott$alpha))),
fill = c("#A5260A", 7,11), cex=0.8)
# p_box <- ggplot(box_tmp, aes(x = as.factor(alpha), y = val, group=alpha, color = as.factor(alpha))) +
# geom_boxplot() + labs(title = line-1)
# print(p_box)FALSE
}
dev.off()
## png
## 2
system("convert -delay 120 *.png hist_bottle1.gif")
file.remove(list.files(pattern=".png"))
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE
#plot(vec_test_x, vec_test, type = "b")
###Bottleneck
print(difftime(Sys.time(), time.ini))
## Time difference of 11.2073 mins